rm(list = ls())
setwd("C:/Users/Vagheesh Narasimhan/Dropbox/g2001")
#setwd("C:/Users/jq/Dropbox/g2001")

library(foreign)
library(sandwich)
library(lmtest)
library(car)
library(leaps)
library(Amelia)
library(Zelig)
library(MatchIt)
library(cem)
library(xtable)
library(Cairo)

# Replication R Script
# http://www.sciencedirect.com/science/article/pii/S0305750X08003112
# Keeping a Low Profile: What Determines the Allocation of Aid by Non-Governmental Organizations?
# Ryann Manning
# John Quattrochi
# Vagheesh Narasimhan

###################################################
#################### FUNCTIONS #################### 
###################################################

#function for outputting data - takes in order: an unrestricted model, restricted model, degrees of freedom, cluster vector
clx <- function(fm, fmres, dfcw, cluster){
    # R-codes (www.r-project.org) for computing
    # clustered-standard errors. Mahmood Arai, Jan 26, 2008.
	 
	# The arguments of the function are:
	# fitted model, cluster1 and cluster2
	# You need to install libraries `sandwich' and `lmtest'

	# reweighting the var-cov matrix for the within model
	library(sandwich);library(lmtest)
	M <- length(unique(cluster))   
	N <- length(cluster)           
	K <- fm$rank                        
	dfc <- (M/(M-1))*((N-1)/(N-K))  
	uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
	vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)*dfcw
	llfm <- logLik(fm)
	llfmres <- logLik(fmres)
	if (fm$family[1]=="binomial")
	{
		pr2 = 1 - llfm/llfmres
	}
	else
	{
		pr2 = 1-sum((fm$residuals)^2)/sum((fm$y-mean(fm$y))^2)
	}
	#calculate the number of observations from the residual degrees of freedom
	numobs <- fm$df.null + 1
	#calculate the clustered standard errors
	cf<-coeftest(fm, vcovCL)
	#combine the columns for the estimates and the clustered statistic
	cfdouble<-paste(cf[,1], abs(cf[,3]), sep="/")
	cfdouble<-c(cfdouble,numobs,M,pr2,llfm)
	names(cfdouble) <- c(rownames(cf),"numobs","numcountries","R2","log-likelihood")
	return(cfdouble)
	}

###################################################
#################### DATA MANIPULATION ############
###################################################	

#Original Data from Paper
#replidata <- read.dta("NGO_LSE_org4.dta")
new_data <- read.dta("NGO_LSE_org5.dta")

#Variables for table 1
vars.col1  <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log","countrycode")
vars.col2  <- c("NGOactive", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Gini", "countrycode")
vars.col3  <- c("NGOactive", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "HDI2004", "countrycode")
vars.col4  <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Gini", "HDI2004", "countrycode")
vars.col5  <- c("NGOactive", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Freedom", "countrycode")
vars.col6  <- c("NGOactive", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "law_ord", "countrycode")
vars.col7  <- c("NGOactive", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "kauf_law", "countrycode")
vars.col8  <- c("NGOactive", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "ti", "countrycode")
vars.col9  <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "law_ord", "countrycode")
vars.col10 <- c("NGOactive", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Freedom", "kauf_law", "countrycode")
vars.col11 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "export_tot_sc", "countrycode")
vars.col12 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "UNvoting", "countrycode")
vars.col13 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "export_tot_sc", "UNvoting", "countrycode")
vars.col14 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "religionmatch", "pop_log","countrycode", "otherNGOamount_log")
vars.col15 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "religionmatch", "otherNGO", "pop_log","countrycode", "otherNGOamount_log")
vars.col16 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO","pop_log","countrycode", "colony")
vars.col17 <- c("NGOactive", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log","countrycode", "colony")
#Variables for table 2
vars.col1t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log","countrycode")
vars.col2t2  <- c("NGOactive", "NGOaid_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Gini", "countrycode")
vars.col3t2  <- c("NGOactive", "NGOaid_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "HDI2004", "countrycode")
vars.col4t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Gini", "HDI2004", "countrycode")
vars.col5t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Freedom", "countrycode")
vars.col6t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "law_ord", "countrycode")
vars.col7t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "kauf_law", "countrycode")
vars.col8t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "ti", "countrycode")
vars.col9t2  <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "law_ord", "countrycode")
vars.col10t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "Freedom", "kauf_law", "countrycode")
vars.col11t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "export_tot_sc", "countrycode")
vars.col12t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "UNvoting", "countrycode")
vars.col13t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log", "export_tot_sc", "UNvoting", "countrycode")
vars.col14t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "religionmatch", "pop_log","countrycode", "otherNGOamount_log")
vars.col15t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "religionmatch", "otherNGO", "pop_log","countrycode", "otherNGOamount_log")
vars.col16t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO","pop_log","countrycode", "colony")
vars.col17t2 <- c("NGOactive", "NGOaid_log", "gdpcap_log", "polity2", "Bilaid_log", "otherNGO", "religionmatch", "pop_log","countrycode", "colony")
#New data subselected from the original data for table 1
col1data  <- replidata[,vars.col1]
col2data  <- replidata[,vars.col2]
col3data  <- replidata[,vars.col3]
col4data  <- replidata[,vars.col4]
col5data  <- replidata[,vars.col5]
col6data  <- replidata[,vars.col6]
col7data  <- replidata[,vars.col7]
col8data  <- replidata[,vars.col8]
col9data  <- replidata[,vars.col9]
col10data <- replidata[,vars.col10]
col11data <- replidata[,vars.col11]
col12data <- replidata[,vars.col12]
col13data <- replidata[,vars.col13]
col14data <- replidata[,vars.col14]
col15data <- replidata[,vars.col15]
col16data <- replidata[,vars.col16]
col17data <- replidata[,vars.col17]
#New data subselected from the original data for table 2
col1datat2  <- replidata[,vars.col1t2]
col2datat2  <- replidata[,vars.col2t2]
col3datat2  <- replidata[,vars.col3t2]
col4datat2  <- replidata[,vars.col4t2]
col5datat2  <- replidata[,vars.col5t2]
col6datat2  <- replidata[,vars.col6t2]
col7datat2  <- replidata[,vars.col7t2]
col8datat2  <- replidata[,vars.col8t2]
col9datat2  <- replidata[,vars.col9t2]
col10datat2 <- replidata[,vars.col10t2]
col11datat2 <- replidata[,vars.col11t2]
col12datat2 <- replidata[,vars.col12t2]
col13datat2 <- replidata[,vars.col13t2]
col14datat2 <- replidata[,vars.col14t2]
col15datat2 <- replidata[,vars.col15t2]
col16datat2 <- replidata[,vars.col16t2]
col17datat2 <- replidata[,vars.col17t2]
#remove NA's from the data for table 1
col1datared  <- na.omit(col1data)
col2datared  <- na.omit(col2data)
col3datared  <- na.omit(col3data)
col4datared  <- na.omit(col4data)
col5datared  <- na.omit(col5data)
col6datared  <- na.omit(col6data)
col7datared  <- na.omit(col7data)
col8datared  <- na.omit(col8data)
col9datared  <- na.omit(col9data)
col10datared <- na.omit(col10data)
col11datared <- na.omit(col11data)
col12datared <- na.omit(col12data)
col13datared <- na.omit(col13data)
col14datared <- na.omit(col14data)
col15datared <- na.omit(col15data)
col16datared <- na.omit(col16data)
col17datared <- na.omit(col17data)

#remove NA's from the data for table 2
col1dataredt2  <- na.omit(col1datat2)
col2dataredt2  <- na.omit(col2datat2)
col3dataredt2  <- na.omit(col3datat2)
col4dataredt2  <- na.omit(col4datat2)
col5dataredt2  <- na.omit(col5datat2)
col6dataredt2  <- na.omit(col6datat2)
col7dataredt2  <- na.omit(col7datat2)
col8dataredt2  <- na.omit(col8datat2)
col9dataredt2  <- na.omit(col9datat2)
col10dataredt2 <- na.omit(col10datat2)
col11dataredt2 <- na.omit(col11datat2)
col12dataredt2 <- na.omit(col12datat2)
col13dataredt2 <- na.omit(col13datat2)
col14dataredt2 <- na.omit(col14datat2)
col15dataredt2 <- na.omit(col15datat2)
col16dataredt2 <- na.omit(col16datat2)
col17dataredt2 <- na.omit(col17datat2)

#remove rows with non-active NGOs from our reduced table with the covariates of interest for table2
col1dataactivet2  <- col1dataredt2[col1dataredt2$NGOactive==1,]
col2dataactivet2  <- col2dataredt2[col2dataredt2$NGOactive==1,]
col3dataactivet2  <- col3dataredt2[col3dataredt2$NGOactive==1,]
col4dataactivet2  <- col4dataredt2[col4dataredt2$NGOactive==1,]
col5dataactivet2  <- col5dataredt2[col5dataredt2$NGOactive==1,]
col6dataactivet2  <- col6dataredt2[col6dataredt2$NGOactive==1,]
col7dataactivet2  <- col7dataredt2[col7dataredt2$NGOactive==1,]
col8dataactivet2  <- col8dataredt2[col8dataredt2$NGOactive==1,]
col9dataactivet2  <- col9dataredt2[col9dataredt2$NGOactive==1,]
col10dataactivet2 <- col10dataredt2[col10dataredt2$NGOactive==1,]
col11dataactivet2 <- col11dataredt2[col11dataredt2$NGOactive==1,]
col12dataactivet2 <- col12dataredt2[col12dataredt2$NGOactive==1,]
col13dataactivet2 <- col13dataredt2[col13dataredt2$NGOactive==1,]
col14dataactivet2 <- col14dataredt2[col14dataredt2$NGOactive==1,]
col15dataactivet2 <- col15dataredt2[col15dataredt2$NGOactive==1,]
col16dataactivet2 <- col16dataredt2[col16dataredt2$NGOactive==1,]
col17dataactivet2 <- col17dataredt2[col17dataredt2$NGOactive==1,]

###################################################
#################### MULTIPLE IMPUTATION ###################
###################################################

#Create dataset containing only variables of interest (dep and ind)
attach(new_data)
replidata2 <-as.matrix(cbind(NGOactive, NGOaid_log, gdpcap_log, polity2, Bilaid_log, otherNGO, pop_log,Gini,HDI2004,Freedom,
		law_ord,kauf_law,ti,export_tot_sc,UNvoting,otherNGOamount_log,countrycode,col_relig,religionmatch2,colony))
dim(replidata2)
detach(new_data)
replidata3 <- as.data.frame(replidata2)

#Transform export_tot_sc to log so that Amelia doesn't impute negative values
replidata4 <- replidata3
replidata4$export_ln <- log(replidata4$export_tot_sc,exp(1)) 
replidata4$export_tot_sc <- NULL

#Impute missing data
# we use the amelia() function to create the five bootstrapped
# em imputation models.  Notice the use of the cs argument 
# and ts argument to reflect the panel nature of our data.  

set.seed(12345)
a.out3 <- amelia(x = replidata4, cs="countrycode",m = 5)
summary(a.out3)

# save imputed datasets (default .csv)
write.amelia(obj=a.out3, file.stem = "a.out3")

### Amelia diagnostics
missmap(a.out3)


plot(a.out3, which.vars=1:6)
plot(a.out3, which.vars=7:12)
plot(a.out3, which.vars=13:18)
plot(a.out3, which.vars=19:24)
CairoPDF("imputationplots")
plot(a.out3)
dev.off()
CairoPDF("Overimputationplots-gdpcap_log")
overimpute(a.out3, var = "gdpcap_log")
dev.off()
#Other diagnostics
	#overimpute()
	#disperse()

#Pre-matching estimate of effect of colony/religion on NGOactive
imp1 <- a.out3$imputations[[1]]
attach(imp1)
treat <- factor(col_relig)
imp1 <- cbind(imp1, treat)
detach(imp1)

location  <- zelig(NGOactive ~ treat + pop_log, data=imp1,
		model="logit")
summary(location)

location.table <- xtable(location)
print(location.table,floating=FALSE)
#How should we present this?

#Pre-matching estimate of effect of exports on NGOaid_log
	#Transform exports back to original quantities
	imp1.1 <- imp1
	imp1.1$exports_tot_sc <- exp(imp1.1$export_ln)
	imp1.1 <- cbind(imp1.1,imp1.1$exports_tot_sc)
	#Condition on presence of NGOs
	imp1.1 <- subset(imp1.1,imp1.1$NGOactive==1)
dim(imp1.1)

exports.pre  <- zelig(NGOaid_log ~ exports_tot_sc+gdpcap_log+ polity2+ pop_log+
		+col_relig, data=imp1.1,
		model="normal")
summary(exports.pre)

exportspre.table <- xtable(exports.pre)
print(exportspre.table,floating=FALSE)

#Pre-matching estimate of Need as measured by the HDI coefficient on NGOaid_log
need  <- zelig(NGOaid_log ~ HDI2004 + pop_log, data=imp1.1,model="normal")
summary(need)
need.table <- xtable(need)
print(need.table,floating=FALSE)

#Pre-matching estimate of voting as measured UNGA vote on NGOaid_log
voting  <- zelig(NGOaid_log~UN_dichot+gdpcap_log+polity2+Bilaid_log+otherNGO+pop_log+Gini+ HDI2004+Freedom+law_ord+kauf_law+ti+treat, data=un.data,model="normal")
summary(voting)
voting.table <- xtable(voting)
print(voting.table,floating=FALSE)

###################################################
#################### CEM ###################
###################################################

#Assuming that we can justify dichotomous treatments

###################################
### Effect of col_relig on NGOactive
###################################
#Create dataset with only vars of interest
attach(imp1)
col.rel <-as.matrix(cbind(NGOactive, pop_log,col_relig))
dim(col.rel)
detach(imp1)

col.rel <- as.data.frame(col.rel)
attach(col.rel)
treat2 <- ifelse(col_relig>0,1,0)
col.rel <- cbind(col.rel,treat2)
col.rel$col_relig <- NULL
detach(col.rel)

#CEM
cem.match <- cem(treatment="treat2", data=col.rel, drop="NGOactive")
cem.match$imbalance
tab <- relax.cem(cem.match, col.rel, depth = 1, perc = 0.3)

#ATT from cem matched data
activeCemMatchedModel <- att(cem.match, NGOactive~treat2+pop_log, data=col.rel, model="logit")
print(xtable(t(activeCemMatchedModel$att.model)))

#Is this better than the regression result above?

###################################
### Effect of exports on NGOaid_log
###################################
rm(imp1.2)
#Create dataset with only vars of interest
imp1.2 <- a.out3$imputations[[1]]
treat <- factor(imp1.2$col_relig)
exports_tot_sc <- exp(imp1.2$export_ln)
imp1.2 <- cbind(imp1.2,exports_tot_sc,treat)
	#dichotomize at median of export_tot_sc distribution
exp_dichot <- ifelse(imp1.2$exports_tot_sc>=0.0002958,1,0)
length(exp_dichot)
imp1.2 <- cbind(imp1.2,exp_dichot)
imp1.2$exports_tot_sc <- NULL
imp1.2 <- subset(imp1.2,NGOactive==1)
dim(imp1.2)

vars.exp  <- c("NGOaid_log", "polity2", "pop_log","HDI2004",
	"colony","religionmatch2","exp_dichot")
exp.data  <- imp1.2[,vars.exp]

#CEM
cem.match2 <- cem(treatment="exp_dichot", data=exp.data, drop="NGOaid_log")
cem.match2$imbalance
#ATT from cem matched data
exportsCemMatchedModel <- att(cem.match2, NGOaid_log~exp_dichot+polity2+pop_log+
HDI2004+colony+religionmatch2, data=exp.data, model="linear")
print(xtable(t(exportsCemMatchedModel$att.model)))
CairoPDF(file="MatchingExportsNGOaid.pdf")
relax.cem(cem.match2,exp.data, depth = 1,perc = 0.3)
dev.off()


###################################
### Effect of UNGAvoting on NGOaid_log
###################################
#The philosophy here is to include all pre-treatment covariates in the model
#Create dataset with only vars of interest
	#dichotomize UNvoting of .5
UN_dichot <- ifelse(imp1.2$UNvoting>0.5,1,0)
imp1.2 <- cbind(imp1.2,UN_dichot)
dim(imp1.2)
vars.un  <- c("NGOaid_log", "polity2", 
 "pop_log","HDI2004","UN_dichot",
	"colony","religionmatch2")
un.data  <- imp1.2[,vars.un]
head(imp1.2)
#CEM
cem.un <- cem(treatment="UN_dichot", data=un.data, drop="NGOaid_log")
cem.un$imbalance
#ATT from cem matched data
votingCemMatchedModel <- att(cem.un, NGOaid_log~UN_dichot+polity2+pop_log+
HDI2004+colony+religionmatch2, data=un.data, model="linear")
print(xtable(t(votingCemMatchedModel$att.model)))
CairoPDF(file="MatchingVoteNGOaid.pdf")
relax.cem(cem.un,un.data, depth = 1,perc = 0.3)
dev.off()
###################################################
#################### MODEL RUNS ###################
###################################################

#GLM run with probit link for the models
col1model  <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log, family=binomial(link = "probit"), data=col1datared)
col2model  <- glm(NGOactive ~ polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + Gini, family=binomial(link = "probit"), data=col2datared)
col3model  <- glm(NGOactive ~ polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + HDI2004, family=binomial(link = "probit"), data=col3datared)
col4model  <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + Gini + HDI2004, family=binomial(link = "probit"), data=col4datared)
col5model  <- glm(NGOactive ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + Freedom, family=binomial(link = "probit"), data=col5datared)
col6model  <- glm(NGOactive ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + law_ord, family=binomial(link = "probit"), data=col6datared)
col7model  <- glm(NGOactive ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + kauf_law, family=binomial(link = "probit"), data=col7datared)
col8model  <- glm(NGOactive ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + ti, family=binomial(link = "probit"), data=col8datared)
col9model  <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + law_ord, family=binomial(link = "probit"), data=col9datared)
col10model <- glm(NGOactive ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + Freedom + kauf_law, family=binomial(link = "probit"), data=col10datared)
col11model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + export_tot_sc, family=binomial(link = "probit"), data=col11datared)
col12model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + UNvoting, family=binomial(link = "probit"), data=col12datared)
col13model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + export_tot_sc + UNvoting, family=binomial(link = "probit"), data=col13datared)
col14model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + religionmatch + pop_log + otherNGOamount_log, family=binomial(link = "probit"), data=col14datared)
col15model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + otherNGOamount_log, family=binomial(link = "probit"), data=col15datared)
col16model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO+ pop_log + colony, family=binomial(link = "probit"), data=col16datared)
col17model <- glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + colony, family=binomial(link = "probit"), data=col17datared)


#GLM for unrestricted model to calculate the pseudo likelihood
col1modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col1datared)
col2modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col2datared)
col3modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col3datared)
col4modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col4datared)
col5modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col5datared)
col6modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col6datared)
col7modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col7datared)
col8modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col8datared)
col9modelres  <- glm(NGOactive~1, family=binomial(link = "probit"), data=col9datared)
col10modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col10datared)
col11modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col11datared)
col12modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col12datared)
col13modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col13datared)
col14modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col14datared)
col15modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col15datared)
col16modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col16datared)
col17modelres <- glm(NGOactive~1, family=binomial(link = "probit"), data=col17datared)


#OLS run for the models
col1modelt2  <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log, data=col1dataactivet2)
col2modelt2  <- glm(NGOaid_log ~ polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + Gini, data=col2dataactivet2)
col3modelt2  <- glm(NGOaid_log ~ polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + HDI2004, data=col3dataactivet2)
col4modelt2  <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + Gini + HDI2004, data=col4dataactivet2)
col5modelt2  <- glm(NGOaid_log ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + Freedom, data=col5dataactivet2)
col6modelt2  <- glm(NGOaid_log ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + law_ord,  data=col6dataactivet2)
col7modelt2  <- glm(NGOaid_log ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + kauf_law, data=col7dataactivet2)
col8modelt2  <- glm(NGOaid_log ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + ti, data=col8dataactivet2)
col9modelt2  <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + law_ord, data=col9dataactivet2)
col10modelt2 <- glm(NGOaid_log ~ gdpcap_log + Bilaid_log + otherNGO + religionmatch + pop_log + Freedom + kauf_law, data=col10dataactivet2)
col11modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + export_tot_sc, data=col11dataactivet2)
col12modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + UNvoting, data=col12dataactivet2)
col13modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + export_tot_sc + UNvoting, data=col13dataactivet2)
col14modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + religionmatch + pop_log + otherNGOamount_log, data=col14dataactivet2)
col15modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + otherNGOamount_log, data=col15dataactivet2)
col16modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO+ pop_log + colony, data=col16dataactivet2)
col17modelt2 <- glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log + colony, data=col17dataactivet2)

#unrestricted OLS run for the models, although the R2 is calculated separetely, but keeping it here because clx will complain otherwise!!
col1modelt2res  <- glm(NGOaid_log ~ 1, data=col1dataactivet2)
col2modelt2res  <- glm(NGOaid_log ~ 1, data=col2dataactivet2)
col3modelt2res  <- glm(NGOaid_log ~ 1, data=col3dataactivet2)
col4modelt2res  <- glm(NGOaid_log ~ 1, data=col4dataactivet2)
col5modelt2res  <- glm(NGOaid_log ~ 1, data=col5dataactivet2)
col6modelt2res  <- glm(NGOaid_log ~ 1, data=col6dataactivet2)
col7modelt2res  <- glm(NGOaid_log ~ 1, data=col7dataactivet2)
col8modelt2res  <- glm(NGOaid_log ~ 1, data=col8dataactivet2)
col9modelt2res  <- glm(NGOaid_log ~ 1, data=col9dataactivet2)
col10modelt2res <- glm(NGOaid_log ~ 1, data=col10dataactivet2)
col11modelt2res <- glm(NGOaid_log ~ 1, data=col11dataactivet2)
col12modelt2res <- glm(NGOaid_log ~ 1, data=col12dataactivet2)
col13modelt2res <- glm(NGOaid_log ~ 1, data=col13dataactivet2)
col14modelt2res <- glm(NGOaid_log ~ 1, data=col14dataactivet2)
col15modelt2res <- glm(NGOaid_log ~ 1, data=col15dataactivet2)
col16modelt2res <- glm(NGOaid_log ~ 1, data=col16dataactivet2)
col17modelt2res <- glm(NGOaid_log ~ 1, data=col17dataactivet2)


###################################################
#################### MODEL OUTPUTS ################
###################################################
# output the coefficients, clustered Z-scores, number of observations and r-squared value for the probit model
clustprobitcol1model  <- clx(col1model,col1modelres,1,col1datared$countrycode)
clustprobitcol2model  <- clx(col2model,col2modelres,1,col2datared$countrycode)
clustprobitcol3model  <- clx(col3model,col3modelres,1,col3datared$countrycode)
clustprobitcol4model  <- clx(col4model,col4modelres,1,col4datared$countrycode)
clustprobitcol5model  <- clx(col5model,col5modelres,1,col5datared$countrycode)
clustprobitcol6model  <- clx(col6model,col6modelres,1,col6datared$countrycode)
clustprobitcol7model  <- clx(col7model,col7modelres,1,col7datared$countrycode)
clustprobitcol8model  <- clx(col8model,col8modelres,1,col8datared$countrycode)
clustprobitcol9model  <- clx(col9model,col9modelres,1,col9datared$countrycode)
clustprobitcol10model <- clx(col10model,col10modelres,1,col10datared$countrycode)
clustprobitcol11model <- clx(col11model,col11modelres,1,col11datared$countrycode)
clustprobitcol12model <- clx(col12model,col12modelres,1,col12datared$countrycode)
clustprobitcol13model <- clx(col13model,col13modelres,1,col13datared$countrycode)
clustprobitcol14model <- clx(col14model,col14modelres,1,col14datared$countrycode)
clustprobitcol15model <- clx(col15model,col15modelres,1,col15datared$countrycode)
clustprobitcol16model <- clx(col16model,col16modelres,1,col16datared$countrycode)
clustprobitcol17model <- clx(col17model,col17modelres,1,col17datared$countrycode)

# output the coefficients, clustered Z-scores, number of observations and r-squared value for the OLS model
clustolscol1model  <- clx(col1modelt2,col1modelt2res,1,col1dataactivet2$countrycode)
clustolscol2model  <- clx(col2modelt2,col2modelt2res,1,col2dataactivet2$countrycode)
clustolscol3model  <- clx(col3modelt2,col3modelt2res,1,col3dataactivet2$countrycode)
clustolscol4model  <- clx(col4modelt2,col4modelt2res,1,col4dataactivet2$countrycode)
clustolscol5model  <- clx(col5modelt2,col5modelt2res,1,col5dataactivet2$countrycode)
clustolscol6model  <- clx(col6modelt2,col6modelt2res,1,col6dataactivet2$countrycode)
clustolscol7model  <- clx(col7modelt2,col7modelt2res,1,col7dataactivet2$countrycode)
clustolscol8model  <- clx(col8modelt2,col8modelt2res,1,col8dataactivet2$countrycode)
clustolscol9model  <- clx(col9modelt2,col9modelt2res,1,col9dataactivet2$countrycode)
clustolscol10model <- clx(col10modelt2,col10modelt2res,1,col10dataactivet2$countrycode)
clustolscol11model <- clx(col11modelt2,col11modelt2res,1,col11dataactivet2$countrycode)
clustolscol12model <- clx(col12modelt2,col12modelt2res,1,col12dataactivet2$countrycode)
clustolscol13model <- clx(col13modelt2,col13modelt2res,1,col13dataactivet2$countrycode)
clustolscol14model <- clx(col14modelt2,col14modelt2res,1,col14dataactivet2$countrycode)
clustolscol15model <- clx(col15modelt2,col15modelt2res,1,col15dataactivet2$countrycode)
clustolscol16model <- clx(col16modelt2,col16modelt2res,1,col16dataactivet2$countrycode)
clustolscol17model <- clx(col17modelt2,col17modelt2res,1,col17dataactivet2$countrycode)

#write to table columns
write.table(clustprobitcol1model, "table1col1txt", sep = "\t")
write.table(clustprobitcol2model, "table1col2txt", sep = "\t")
write.table(clustprobitcol3model, "table1col3txt", sep = "\t")
write.table(clustprobitcol4model, "table1col4txt", sep = "\t")
write.table(clustprobitcol5model, "table1col5txt", sep = "\t")
write.table(clustprobitcol6model, "table1col6txt", sep = "\t")
write.table(clustprobitcol7model, "table1col7txt", sep = "\t")
write.table(clustprobitcol8model, "table1col8txt", sep = "\t")
write.table(clustprobitcol9model, "table1col9txt", sep = "\t")
write.table(clustprobitcol10model, "table1col10txt", sep = "\t")
write.table(clustprobitcol11model, "table1col11txt", sep = "\t")
write.table(clustprobitcol12model, "table1col12txt", sep = "\t")
write.table(clustprobitcol13model, "table1col13txt", sep = "\t")
write.table(clustprobitcol14model, "table1col14.txt", sep = "\t")
write.table(clustprobitcol15model, "table1col15.txt", sep = "\t")
write.table(clustprobitcol16model, "table1col16.txt", sep = "\t")
write.table(clustprobitcol17model, "table1col17.txt", sep = "\t")


write.table(clustolscol1model, "table2col1txt", sep = "\t")
write.table(clustolscol2model, "table2col2txt", sep = "\t")
write.table(clustolscol3model, "table2col3txt", sep = "\t")
write.table(clustolscol4model, "table2col4txt", sep = "\t")
write.table(clustolscol5model, "table2col5txt", sep = "\t")
write.table(clustolscol6model, "table2col6txt", sep = "\t")
write.table(clustolscol7model, "table2col7txt", sep = "\t")
write.table(clustolscol8model, "table2col8txt", sep = "\t")
write.table(clustolscol9model, "table2col9txt", sep = "\t")
write.table(clustolscol10model, "table2col10txt", sep = "\t")
write.table(clustolscol11model, "table2col11txt", sep = "\t")
write.table(clustolscol12model, "table2col12txt", sep = "\t")
write.table(clustolscol13model, "table2col13txt", sep = "\t")
write.table(clustolscol14model, "table1col14.txt", sep = "\t")
write.table(clustolscol15model, "table1col15.txt", sep = "\t")
write.table(clustolscol16model, "table1col16.txt", sep = "\t")
write.table(clustolscol17model, "table1col17.txt", sep = "\t")


#display and check the outputs
clustprobitcol1model  
clustprobitcol2model 
clustprobitcol3model 
clustprobitcol4model 
clustprobitcol5model  
clustprobitcol6model 
clustprobitcol7model 
clustprobitcol8model 
clustprobitcol9model 
clustprobitcol10model
clustprobitcol11model 
clustprobitcol12model 
clustprobitcol13model 

clustolscol1model  
clustolscol2model  
clustolscol3model  
clustolscol4model 
clustolscol5model  
clustolscol6model  
clustolscol7model  
clustolscol8model 
clustolscol9model 
clustolscol10model 
clustolscol11model 
clustolscol12model
clustolscol13model

###################################################
#################### MODEL SELECTION ###################
###################################################

						###For NGOactive###
########## By adj R2 ###############

data1 <- a.out$imputations[[1]]
attach(data1)
dat.mat <-as.matrix(cbind(gdpcap_log, polity2, Bilaid_log, otherNGO, religionmatch, pop_log,Gini,HDI2004,Freedom,
		law_ord,kauf_law,ti,export_tot_sc,UNvoting,otherNGOamount_log,colony))
dim(dat.mat)
detach(data1)

#by adj.R2
best.adjr2 <- leaps(dat.mat, data1$NGOactive, method="adjr2")
best.adjr2$adjr2

best3.ind<-order(best.adjr2$adjr2, decreasing=TRUE)[1:3]
dimnames(best.adjr2$which)[[2]]<-dimnames(dat.mat)[[2]]
best.adjr2$which[best3.ind,]

### Model with best R2: NGOactive ~ Bilaid_log + otherNGO + religionmatch + pop_log +
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+colony

							####For NGOaid_log###
########## By adj R2 ###############

data1 <- a.out$imputations[[1]]
#code to subset
	#mydataframe[mydataframe$col1 == 1, ]
	# take out the row where "col1" is 1

attach(data1)
dat.mat <-as.matrix(cbind(gdpcap_log, polity2, Bilaid_log, otherNGO, religionmatch, pop_log,Gini,HDI2004,Freedom,
		law_ord,kauf_law,ti,export_tot_sc,UNvoting,otherNGOamount_log,colony))
dim(dat.mat)
detach(data1)

#by adj.R2
best.adjr2 <- leaps(dat.mat, data1$NGOaid_log, method="adjr2")
best.adjr2$adjr2

best3.ind<-order(best.adjr2$adjr2, decreasing=TRUE)[1:3]
dimnames(best.adjr2$which)[[2]]<-dimnames(dat.mat)[[2]]
best.adjr2$which[best3.ind,]

### Model with best R2: NGOaid_log ~ Bilaid_log + otherNGO + religionmatch + pop_log+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+colony

								###For NGOactive###
########## Forward/Backward/Stepwise ###############


## Forward
attach(data1)
step.forw_act <- step(glm(NGOactive ~ 1), ~gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony, direction="forward" )
detach(data1)

## Backward

attach(data1)
step.back_act <- step(glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony), direction="backward" )
detach(data1)

## Both

attach(data1)
step.both_act <- step(glm(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony), direction="both" )
detach(data1)

### Model selected by all three methods: 
#		NGOactive ~ Bilaid_log + otherNGO + religionmatch + pop_log + 
#    				law_ord + UNvoting + colony

# probit regression, post-imputation, using selected model
post.imp_sel  <- zelig(NGOactive ~ Bilaid_log + otherNGO + religionmatch + pop_log+
		law_ord+UNvoting+colony, data=a.out$imputations,
		model="probit")
summary(post.imp_sel)


							###For NGOaid_log###
########## Forward/Backward/Stepwise ###############

## Forward
attach(data1)
step.forw_aid <- step(glm(NGOaid_log ~ 1), ~gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony, direction="forward" )
detach(data1)

## Backward

attach(data1)
step.back_aid <- step(glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony), direction="backward" )
detach(data1)

## Both

attach(data1)
step.both_aid <- step(glm(NGOaid_log ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony), direction="both" )
detach(data1)

### Model selected by all three methods: 
#	NGOaid_log ~ Bilaid_log + otherNGO + religionmatch + pop_log + 
#	    law_ord + export_tot_sc + UNvoting + colony

data1[1:20,]

# pre-imputation, full model
pre.imp  <- zelig(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony, data=replidata3,
		model="probit")
summary(pre.imp)

# post-imputation, full model
post.imp  <- zelig(NGOactive ~ gdpcap_log + polity2 + Bilaid_log + otherNGO + religionmatch + pop_log+Gini+HDI2004+Freedom+
		law_ord+kauf_law+ti+export_tot_sc+UNvoting+otherNGOamount_log+colony, data=a.out$imputations,
		model="probit")
summary(post.imp)
